import urllib.request
from bs4 import BeautifulSoup
import glob
import jieba
import re
import requests



headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
}
params = {
}

#urlopen()和get()区别
#https://blog.csdn.net/xiangxianghehe/article/details/55803584

class MyHttp:
    
    @staticmethod
    def http_json_data(url):
        print(url)
        web = urllib.request.urlopen(url, timeout = 5000)
        data = web.read()
        web.close()
        return data

    @staticmethod
    def http_json_data_v2(url, proxy = ""):
        print(url)
        my_proxy = {
            "http"  : proxy,
            "https"  : proxy
        }
        return requests.get(url, params=params, headers=headers, timeout=5, proxies=my_proxy).content

    
    @staticmethod
    def bs4_utf8_data(url, time_wait = 5000):
        web = urllib.request.urlopen(url, timeout = time_wait)
        data = web.read()
        web.close()
        soup = BeautifulSoup(data.decode("utf-8"), "html5lib")
        return soup

    @staticmethod
    def bs4_utf8_post_data(url, payload):
        data = requests.post(url, data=payload, headers=headers).text
        print(data)
        soup = BeautifulSoup(data, "html5lib")
        return soup

    @staticmethod
    def bs4_utf8_data_v2(url, proxy = ""):
        my_proxy = {
            "http"  : proxy,
            "https"  : proxy
        }
        data = requests.get(url, params=params, headers=headers, timeout=5, proxies=my_proxy).content.decode("utf-8")
        #print(data)
        soup = BeautifulSoup(data, "html5lib")
        return soup

    @staticmethod
    def bs4_utf8_data_v2_with_proxy(url, proxy = ""):
        my_proxy = {
            "http"  : proxy,
            "https"  : proxy
        }

        data = requests.get(url, params=params, headers=headers, timeout=5, proxies=my_proxy).content.decode("utf-8")
        #print(data)
        print("len:" + str(len(data)))
        soup = BeautifulSoup(data, "html5lib")
        return soup
    
    @staticmethod    
    def bs4_gb2312_data(url):
        web = urllib.request.urlopen(url, timeout = 5000)
        data = web.read()
        web.close()
        soup = BeautifulSoup(data.decode("gbk"), "html5lib")
        return soup